Setup

Discharge calcs

Here we load the data from the pressure transducers and identify any jumps.

#Load and trim data----

##This is the data from the pressure transducers 
raw_cm <- read_csv('data/raw/sensor_2018_2019.csv') %>%
  mutate(datetime = round_date(mdy_hm(datetime),'15 mins')) %>% 
  select(Event, site, datetime, stage, temp)
  
#Find data jumps----

## Jumps can come from sensors being moved when data was 
## downloaded or sensors were disturbed by water. 

jumps <- raw_cm %>%
  group_by(site) %>%
  arrange(site,datetime) %>%
  mutate(lag_event = lag(Event), #creates lagged version
         download = ifelse((Event - lag_event) >= 1 | is.na(lag_event), 0,1),
         session = cumsum(download)) 

#Make a dataframe of the first and last X of measurements for each session. X is the of measurements to average over. 
jump_hunter <- function(df = jumps, x = 10){
  last_x <- df %>%
    group_by(site,session) %>%
    # Get the last X obs per site and session
    slice_tail(n = x) %>%
    #Take median stage and lasttime recorded
    summarize(last = median(stage,na.rm=T),
              lasttime = max(datetime)) 
  
#Same as above but for the first X slice of data
  first_x <- jumps %>%
    group_by(site,session) %>%
    slice_head(n = x) %>%
    summarize(first = median(stage,na.rm=T),
              firsttime = min(datetime))
  
#Join these datasets and find the median stage difference and the time diff. The time diff is critical because with big time gaps the jumps are likely wrong. 
  
  plunge <- inner_join(first_x,last_x) %>%
    group_by(site) %>%
    mutate(lead_first = lead(first,1,last[n()]),
           event_jump =  last-lead_first,
           cume_jump = rev(cumsum(rev(event_jump))),
           timediff = lasttime - lead(firsttime,1,lasttime[n()])) %>%
    select(site,session,cume_jump,lasttime)
  
  
  return(plunge)
}

jumped <- jumps %>%
  inner_join(jump_hunter(jumps)) %>%
  #Varsovia (concrete channel) shouldn't jump so removed jumps here 
  #Attached to concrete channel in horizontal setting. 
  mutate(cume_jump = ifelse(site == 'varsovia',0, cume_jump)) %>%
  mutate(jumped_stage = stage - cume_jump)

Balas examine

t.xts <- jumped %>%
   ungroup() %>%
   dplyr::filter(site == 'balas') %>%
   dplyr::select(stage,jumped_stage,datetime) %>%
   xts(. %>% select(-datetime),order.by=.$datetime)


 dygraph(t.xts) %>%
   dyOptions(useDataTimezone = T)

Varsovia examine

t.xts <- jumped %>%
   ungroup() %>%
   dplyr::filter(site == 'varsovia') %>%
   dplyr::select(stage,jumped_stage,datetime) %>%
   xts(. %>% select(-datetime),order.by=.$datetime)


 dygraph(t.xts) %>%
   dyOptions(useDataTimezone = T)

Helado examine

t.xts <- jumped %>%
   ungroup() %>%
   dplyr::filter(site == 'helado') %>%
   dplyr::select(stage,jumped_stage,datetime) %>%
   xts(. %>% select(-datetime),order.by=.$datetime)


 dygraph(t.xts) %>%
   dyOptions(useDataTimezone = T)

Raices examine

t.xts <- jumped %>%
   ungroup() %>%
   dplyr::filter(site == 'raices') %>%
   dplyr::select(stage,jumped_stage,datetime) %>%
   xts(. %>% select(-datetime),order.by=.$datetime)


 dygraph(t.xts) %>%
   dyOptions(useDataTimezone = T)

Next we compare the sensors data and weekly reference measurements (different than the rating curves which we will load later)

#In field observations
staff <- read_csv('data/raw/Staff_TS_27MAY21.csv') %>% 
  select(1:4) %>%
  mutate(datetime = mdy_hm(paste(date,time))) %>%
  dplyr::filter(!is.na(datetime)) %>%
  mutate(datetime = round_date(datetime,'15 mins'))

##attach to data from transducers
staff_sensor <- staff %>%
  inner_join(jumped,by = c('site','datetime')) %>%
  mutate(month = month(datetime))

#April onward comparison of staff vs. sensor----
##Our data quality improves after April 2019 so we will be using April 2019-April 2020 as our '1 calendar year'

start_date = mdy_hms('04/01/2019 00:00:00')
staff_sensor_april <- staff_sensor %>%
  dplyr::filter(datetime > start_date)

stage_v_sensor<- ggplot(staff_sensor_april,aes(x=staff_stage,y=jumped_stage,color = month)) + 
  geom_point() + 
  facet_wrap(~site,scales='free') + 
  stat_smooth(method = 'lm') + 
  labs(title= "After April 2019")+
  scale_color_viridis_c() + 
  xlab('Regla measurement') + 
  ylab('Sensor stage (jumped)')+
  theme_few()



# Sensor to staff conversions----

#creates a linear model using transducer data
staff_conversion <- staff_sensor_april %>%
  group_by(site) %>%
  nest() %>%
  mutate(mods = map(data,~lm(staff_stage ~ jumped_stage, data = .x)),
         #Model summaries
         mod_sum = map(mods,glance),
         mod_tidy = map(mods,tidy))

staff_summaries <- staff_conversion %>%
  unnest(mod_sum) %>%
  select(site,r.squared,p.value)

knitr::kable(staff_summaries)
site r.squared p.value
balas 0.9009185 0e+00
raices 0.8597792 2e-07
helado 0.9109396 0e+00
varsovia 0.7295712 0e+00

The outputs of this models are predictions where jumped stage is now converted to staff stage at the 0.05,0.5, and 0.95 confidence intervals. It will give us a sense of how uncertain our Q estimates are given our uncertainty in our staff relationships.

my.predict <- function(mods,sensor_data){
  out <- predict.lm(mods,sensor_data,interval='confidence')
  return(tibble(med_staff = out[,1] %>% as.numeric(.),
                min_staff = out[,2] %>% as.numeric(.),
                max_staff = out[,3] %>% as.numeric(.)))
}

jumped_staff <- jumped %>%
  group_by(site) %>%
  nest() %>%
  rename(sensor_data = data) %>%
  inner_join(staff_conversion,by = 'site') %>%
  mutate(preds = map2(mods,sensor_data,my.predict)) %>%
  select(-mods,-mod_sum,-mod_tidy,-data) %>%
  unnest(c(sensor_data,preds)) 

Next we load the ratings curves. See text for rating curve methods

q <- read_csv('data/raw/rating_curves_2018_2019.csv')  %>%  
  mutate(manual_stage = manual_stage * 100)

rating_curves<- ggplot(q,aes(x=manual_stage,y=q)) + 
  geom_point() + 
  facet_wrap(~site, scales = 'free') +
  theme_few()

rating_curves

Finally, we model discharge using the jumped staff dataframe and our rating curves. Note: This is the method used to determine daily discharge in Raices, Helado, Balas and Varsovia. The remaining two tributaries (Cacao and Yure) used manning’s equation.

## Likely shapes (assign log to natural sites and linear to channel)
functional_response <- tibble(site = c('balas','helado','raices','varsovia'),
                              response = c('log','log',
                                           'log','linear'))

#linear model for varsovia and log model for other 3 sites
man_q <- function(df){
  if(df$response[1] == 'log'){
    mod <- lm(log(q) ~ manual_stage, data = df)
  } else{
    mod <- lm(q ~ manual_stage, data = df)
  }
}

q_mods <- q %>%
  inner_join(functional_response) %>%
  group_by(site) %>%
  nest() %>%
  mutate(q_mods = map(data,man_q),
         #Model summaries
         q_sum = map(q_mods,glance),
         q_tidy = map(q_mods,tidy)) %>%
  inner_join(functional_response) 


q_summaries <- q_mods %>%
  unnest(q_sum) %>%
  select(site,r.squared,p.value,response)

knitr::kable(q_summaries)
site r.squared p.value response
balas 0.8795183 0.0017900 log
helado 0.7896066 0.0005879 log
raices 0.9142658 0.0002036 log
varsovia 0.9795350 0.0000000 linear
q.predict <- function(mods,sensor_data){
  out <- predict.lm(mods,sensor_data,interval='confidence')
  return(tibble(med_q = out[,1] %>% as.numeric(.),
                min_q = out[,2] %>% as.numeric(.),
                max_q = out[,3] %>% as.numeric(.)))
  
}



q_preds <- jumped_staff %>%
  rename(manual_stage = jumped_stage) %>%
  group_by(site) %>% 
  nest() %>%
  rename(sensor_data = data) %>%
  inner_join(q_mods,by = 'site') %>%
  mutate(preds = map2(q_mods,sensor_data,q.predict)) %>%
  select(-q_mods,-q_sum,-q_tidy,-data) %>%
  unnest(c(sensor_data,preds)) %>%
  mutate(if_any(ends_with('_q'), ~ ifelse(response == 'log',exp(.),.))) %>%
  dplyr::filter(datetime > mdy_hms('04/01/2019 00:00:00'),
                if_any(ends_with('_q'), ~ . < 10)) %>%
  mutate(date = as.Date(datetime)) 
save(q_preds, file = 'data/final/q.RData')